import org.apache.spark.sql.SparkSession

object Groupbykey {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .appName("RDDPartitionExample")
      .master("local[*]")
      .getOrCreate()
    val sc = spark.sparkContext

    val rdd_1 = sc.parallelize(List(('a', 1),('a', 2),('b', 1),('c', 1),('c', 1)))

    val g_rdd = rdd_1.groupByKey()

    println(g_rdd.collect().mkString(","))
    println(g_rdd.map(x => (x._2.size)).collect().mkString(","))
  }
}
